
import json
import random
import chardet
from urllib.request import Request
import urllib.parse
from urllib.request import urlretrieve
from urllib.request import urlopen
import sys

user_agent = [
    'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
]

def getMMData(url,currentPage=0):
    formdata = {
        'q':'',
        'viewFlag':'A',
        'sortType':'default',
        'searchStyle':'',
        'searchRegion':'city:',
        'searchFansNum':'',
        'currentPage': currentPage,
        'pageSize': '100'
    } 
    formdata = urllib.parse.urlencode(formdata)
    headrs = {'User-Agent':random.choice(user_agent)}
    requ = Request(url,data=bytes(formdata,'utf-8'),headers=headrs)
    data=  urllib.request.urlopen(requ).read() 
    data = data.decode(chardet.detect(data)['encoding'])
    dataToDict = json.loads(data)
    return dataToDict['data']['searchDOList']

def getMMID(data):
    ID = []
    for i in data:
        ID.append(i['userId'])
    return ID

if __name__ == '__main__':
    url = 'https://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8'
    # for i in getMMData(url,1):
    #     print(i['userId'])
    data = getMMData(url)
    ID = getMMID(data)
    # print(ID)

# https://mm.taobao.com/self/album/open_album_list.htm?_charset=utf-8&user_id%20=XXXXXX

# 信息目录
f=open('/Users/hulinhu/Desktop/HK/信息.txt','a',encoding='utf8')

for item in data:
    link='https:'+item['avatarUrl']
    # print link
    # print u'大图 http:', item['avatarUrl']
    # print 'name ', item['realName']
    city=item['city']
    height=item['height']
    weight=item['weight']
    name=item['realName']
    fname=item['realName']+"."+'jpg'

    # 信息
    f.writelines('名字:'+name+'\n')
    f.writelines('身高:'+height+'\n')
    f.writelines('体重:'+weight+'\n')
    f.writelines('城市:'+city+'\n\n')

    # 图片
    # 图片目录
    file=open('/Users/hulinhu/Desktop/HK/%s'%fname,'wb')
    content2=urllib.request.urlopen(link).read()
    file.write(content2)
    print(u"downloading : %s\n"%fname)

file.close()
f.close()

# for num in range(0, len(ID)):
#     html = 'https://mm.taobao.com/self/aiShow.htm?user_id='+str(ID[num])
#     print(html)

  